%CODE FOR PART1 OF SS 2015;
close all; clear; clc;



%% Start Here: Setup working folder, seed, and options;
    % Set Seed;
state=1991;
rand('state',state);
randn('state',state);
stream1= RandStream('mt19937ar','Seed', 1);
RandStream.setGlobalStream(stream1);

%% 0. Import Data and Definitions;
    
    %Import Shotts (2003), current DW nominate scores, and Bonica CF scores;
data_scores=importdata('dwnominate_07162015.csv',',',1);

dw_congress=cellfun(@str2num,data_scores.textdata(2:length(data_scores.textdata),1));
dw_state=cellfun(@str2num,data_scores.textdata(2:length(data_scores.textdata),3));
dw_shotts=data_scores.data(:,1);
dw_current=data_scores.data(:,3);
dw_se=data_scores.data(:,5);
cf_constant=data_scores.data(:,7);
cf_congress=data_scores.data(:,8);

    %Import original regression data from Shotts (2003)- at state level;
S=250;
N=50;

data_shotts=importdata('data_shotts2003.txt');

reg_congress=data_shotts.data(:,1);
reg_state=data_shotts.data(:,2);
districts=data_shotts.data(:,12);

    % Create variables needed for original regression;
libs=data_shotts.data(:,14)./districts;
c=ones(S,1);
stfracmm= data_shotts.data(:,13)./districts;

demprev=data_shotts.data(:,3);
repprev=data_shotts.data(:,4);
demnext=data_shotts.data(:,5);
repnext=data_shotts.data(:,6);

natdprev=data_shotts.data(:,7);
natrprev=data_shotts.data(:,8);
natdnext=data_shotts.data(:,9);
natrnext=data_shotts.data(:,10);

votlib2=((demprev+demnext)./(demprev+demnext+repprev+repnext))-((natdprev+natdnext)./(natdprev+natdnext+natrprev+natrnext));

blackpop=data_shotts.data(:,29);
hispop=data_shotts.data(:,30);

demger=(data_shotts.data(:,11)==-1);

libslag=data_shotts.data(:,15);
south=data_shotts.data(:,16);

    %South sample vector to limit sample just to Southern legislators;
south_sample=find(south==1);
    
    %Manipulation of Shotts (2003) DW Nominate Scores for regression;
[libs_shotts,libslag_shotts]=create_liberal( reg_congress,reg_state,dw_congress,dw_state,dw_shotts);

    %Data to be graphed- southern states for 102,103,104 congresses, and diff. in scores;
reg_state_south=unique(reg_state(south_sample,:));
state_graph=zeros(length(dw_state),1);
for i=1:length(reg_state_south);
    temp=(dw_state==reg_state_south(i));
    state_graph=state_graph+temp;
end

    % Difference in scores between Shotts and most up to date version;
diff_scores=abs(dw_shotts-dw_current);
diff_scores_south=abs(dw_shotts(state_graph==1)-dw_current(state_graph==1));

data_graph=[dw_congress,dw_state,dw_shotts,dw_current];
data_graph=data_graph(state_graph==1,:);

%% Part 1.1. Graph Old and New DW Nominate Scores for Southern States,102-104 Congresses;
    % Calculate Congress Medians for Congresses to be graphed;
med_shotts=zeros(3,1);
med_current=zeros(3,1);

for i=1:3;
    med_shotts(i)=median(dw_shotts(dw_congress==i+101));
    med_current(i)=median(dw_current(dw_congress==i+101));
end

congress_102=find(data_graph(:,1)==102);
congress_103=find(data_graph(:,1)==103);
congress_104=find(data_graph(:,1)==104);

    % Graph formating: definitions for font, colours, etc.;
grey = [0.3,0.3,0.3];
markersize_scores=40;
fontsize_x=30;
linewidth=2;

    % Excluded members- members with scores not to be included in graphs;
graph_extreme= 0.301;
left_shotts102=sum(data_graph(congress_102,3)<-graph_extreme);
right_shotts102=sum(data_graph(congress_102,3)>graph_extreme);

left_current102=sum(data_graph(congress_102,4)<-graph_extreme);
right_current102=sum(data_graph(congress_102,4)>graph_extreme);

left_shotts103=sum(data_graph(congress_103,3)<-graph_extreme);
right_shotts103=sum(data_graph(congress_103,3)>graph_extreme);

left_current103=sum(data_graph(congress_103,4)<-graph_extreme);
right_current103=sum(data_graph(congress_103,4)>graph_extreme);

left_shotts104=sum(data_graph(congress_104,3)<-graph_extreme);
right_shotts104=sum(data_graph(congress_104,3)>0.301);

left_current104=sum(data_graph(congress_104,4)<-graph_extreme);
right_current104=sum(data_graph(congress_104,4)>graph_extreme);

    % Graphs for Old Scores;
        %102;
clf;
figure('units','normalized','position',[.1 .1 1 1]);
subplot(3,1,1);
line([med_shotts(1) med_shotts(1)], [-1 1], 'LineStyle','--','Color', 'r','LineWidth',linewidth+1);
        %Here I create the marker (line) for each congress member to be
        %plotted;
for i=1:length(congress_102);
    line([data_graph(congress_102(i),3) data_graph(congress_102(i),3)], [-0.005 0.005], 'Color', 'k','LineWidth',linewidth)
end
set(gca,'ytick',[],'Ycolor','w','box','off', 'FontSize',fontsize_x);
xlabel('102^{th} Congress', 'FontSize',fontsize_x)
xlim([-0.301 0.301])
ylim([-0.1 0.1])
text(-0.301,0.05, sprintf('\\leftarrow %d additional members', left_shotts102),'FontSize',fontsize_x-2);
text(0.205,0.05, sprintf('%d additional members \\rightarrow ', right_shotts102),'FontSize',fontsize_x-2);
legend('\fontsize{30} \fontsize{20}House Median','\fontsize{30} \fontsize{20} Member Scores', 'Location','northoutside', 'Orientation','horizontal');

        %103;
subplot(3,1,2);
line([med_shotts(2) med_shotts(2)], [-1 1],  'LineStyle','--','Color', 'r','LineWidth',linewidth+1);
for i=1:length(congress_103);
    line([data_graph(congress_103(i),3) data_graph(congress_103(i),3)], [-0.005 0.005], 'Color', 'k','LineWidth',linewidth)
end
set(gca,'ytick',[],'Ycolor','w','box','off', 'FontSize',fontsize_x);
xlabel('103^{th} Congress', 'FontSize',fontsize_x)
xlim([-0.301 0.301])
ylim([-0.1 0.1])
text(-0.301,0.05, sprintf('\\leftarrow %d additional members', left_shotts103),'FontSize',fontsize_x-2);
text(0.205,0.05, sprintf('%d additional members \\rightarrow ', right_shotts103),'FontSize',fontsize_x-2);
legend('\fontsize{30} \fontsize{20}House Median','\fontsize{30} \fontsize{20} Member Scores', 'Location','northoutside', 'Orientation','horizontal');

        %104;
subplot(3,1,3);
line([med_shotts(3) med_shotts(3)], [-1 1],  'LineStyle','--','Color', 'r','LineWidth',linewidth+1);
for i=1:length(congress_104);
    line([data_graph(congress_104(i),3) data_graph(congress_104(i),3)], [-0.005 0.005], 'Color', 'k','LineWidth',linewidth)
end
set(gca,'ytick',[],'Ycolor','w','box','off', 'FontSize',fontsize_x);
xlabel('104^{th} Congress', 'FontSize',fontsize_x)
xlim([-0.301 0.301])
ylim([-0.1 0.1])
text(-0.301,0.05, sprintf('\\leftarrow %d additional members', left_shotts104),'FontSize',fontsize_x-2);
text(0.205,0.05, sprintf('%d additional members \\rightarrow ', right_shotts104),'FontSize',fontsize_x-2);
legend('\fontsize{30} \fontsize{20}House Median','\fontsize{30} \fontsize{20} Member Scores', 'Location','northoutside', 'Orientation','horizontal');

set(gcf,'PaperPositionMode','auto');
saveas(gcf, 'figure1_oldscores.png');

    % Graph for New Congress;
        %102;
clf;
figure('units','normalized','position',[.1 .1 1 1])
subplot(3,1,1);
line([med_current(1) med_current(1)], [-1 1],  'LineStyle','--','Color', 'r','LineWidth',linewidth+1);
for i=1:length(congress_102);
    line([data_graph(congress_102(i),4) data_graph(congress_102(i),4)], [-0.005 0.005], 'Color', 'k','LineWidth',linewidth)
end
hold on;
set(gca,'Ytick',[],'Ycolor','w','box','off', 'FontSize',fontsize_x);
xlabel('102^{th} Congress', 'FontSize',fontsize_x)
xlim([-0.3001 0.3001])
ylim([-0.1 0.1])
text(-0.301,0.05, sprintf('\\leftarrow %d additional members', left_current102),'FontSize',fontsize_x-2);
text(0.205,0.05, sprintf('%d additional members \\rightarrow', right_current102),'FontSize',fontsize_x-2);
legend('\fontsize{30} \fontsize{20}House Median','\fontsize{30} \fontsize{20} Member Scores', 'Location','northoutside', 'Orientation','horizontal');

    %103;
subplot(3,1,2);
line([med_current(2) med_current(2)], [-1 1],  'LineStyle','--','Color', 'r','LineWidth',linewidth+1);
for i=1:length(congress_103);
    line([data_graph(congress_103(i),4) data_graph(congress_103(i),4)], [-0.005 0.005], 'Color', 'k','LineWidth',linewidth)
end
hold on;
set(gca,'Ytick',[],'Ycolor','w','box','off', 'FontSize',fontsize_x);
xlabel('103^{th} Congress', 'FontSize',fontsize_x)
xlim([-0.3001 0.3001])
ylim([-0.1 0.1])
text(-0.301,0.05, sprintf('\\leftarrow %d additional members', left_current103),'FontSize',fontsize_x-2);
text(0.205,0.05, sprintf('%d additional members \\rightarrow', right_current103),'FontSize',fontsize_x-2);
legend('\fontsize{30} \fontsize{20}House Median','\fontsize{30} \fontsize{20} Member Scores', 'Location','northoutside', 'Orientation','horizontal');

    %104;
subplot(3,1,3);
line([med_current(3) med_current(3)], [-1 1],  'LineStyle','--','Color', 'r','LineWidth',linewidth+1);
for i=1:length(congress_104);
    line([data_graph(congress_104(i),4) data_graph(congress_104(i),4)], [-0.005 0.005], 'Color', 'k','LineWidth',linewidth)
end
hold on;
set(gca,'Ytick',[],'Ycolor','w','box','off', 'FontSize',fontsize_x);
xlabel('104^{th} Congress', 'FontSize',fontsize_x)
xlim([-0.3001 0.3001])
ylim([-0.1 0.1])
text(-0.301,0.05, sprintf('\\leftarrow %d additional members', left_current104),'FontSize',fontsize_x-2);
text(0.205,0.05, sprintf('%d additional members \\rightarrow', right_current104),'FontSize',fontsize_x-2);
legend('\fontsize{30} \fontsize{20}House Median','\fontsize{30} \fontsize{20} Member Scores', 'Location','northoutside', 'Orientation','horizontal');

set(gcf,'PaperPositionMode','auto');
saveas(gcf, 'figure1_currentscores.png');
close all;


%% Part 1.2. Replicate Main Regression and Parametric Procedure;
    %Definition of dependant variables for original regressions
Y=libs_shotts;
X=[c,stfracmm,votlib2,blackpop,hispop,demger,libslag_shotts];
w=districts;
k=length(X(1,:));

    %Restrict sample to southern legislators;
Y= Y(south_sample,:);
X= X(south_sample,:);
w= w(south_sample,:);

    %Weighted OLS estimates and standard errors;
[b_orig,se_orig,tstat_orig]=ols_weighted(X,Y,w);


    %Mote carlo parametric procedure drawing noise for each legislator's scores- definitions;
        %Definitions for number of iterations, noise level, and change in noise between
        %each level of standard deviation;
rep=1000;
delta=0.01;
error_max=0.2;
grid=0:delta:error_max;

alpha_montecarlo=zeros(rep,length(grid));
alphase_montecarlo=zeros(rep,length(grid));
alphatstat_montecarlo=zeros(rep,length(grid));

b_montecarlo=zeros(rep,length(grid));
se_montecarlo=zeros(rep,length(grid));
tstat_montecarlo=zeros(rep,length(grid));

    %Original results (to be ploted for noise level zero);
b_montecarlo(:,1)=b_orig(2).*ones(rep,1);
se_montecarlo(:,1)=se_orig(2).*ones(rep,1);
tstat_montecarlo(:,1)=tstat_orig(2).*ones(rep,1);

for i=2:length(grid);
    %Here we draw the noise given standard deviation grid(i);
    noise=normrnd(0,grid(i),length(dw_shotts),rep);
    for j=1:rep;
        %Add noise to scores, recalculate share of liberals- current and
        %lagged, estimate regression coefficients)
        dws_noise=dw_shotts+noise(:,j);
        
        [libs_noise,libslag_noise]=create_liberal(reg_congress,reg_state,dw_congress,dw_state,dws_noise);
    
        Y_noise=libs_noise;
        X_noise=[c,stfracmm,votlib2,blackpop,hispop,demger,libslag_noise];
        Y_noise = Y_noise(south_sample,:);
        X_noise = X_noise(south_sample,:);

        [b_noise,se_noise,tstat_noise]=ols_weighted(X_noise,Y_noise,w);
        b_montecarlo(j,i)=b_noise(2);
        se_montecarlo(j,i)=se_noise(2);
        tstat_montecarlo(j,i)=tstat_noise(2);
        
        alpha_montecarlo(j,i)=b_noise(7);
        alphase_montecarlo(j,i)=se_noise(7);
        alphatstat_montecarlo(j,i)=tstat_noise(7);
    end
end

    % Plot parametric results;
t_critical=norminv(0.975,0,1);
st_critical=tinv(0.975,N-k);
    
    %In the paper we graph the proportion of significant coefficients,
    %which are all positive occurences, and per our pre analysis plan,
    %we also specifically code the positive significant coefficients
    %from the two-tailed test.

b_pcsig=mean(abs(tstat_montecarlo)> t_critical)';

b_pcneg= mean(tstat_montecarlo<0)';
b_pcposcsig=mean(tstat_montecarlo> t_critical)';
b_pcnegsig= mean(tstat_montecarlo<-t_critical)';

b_noisemean=mean(b_montecarlo)';
se_noisemean=mean(se_montecarlo)';
tstat_noisemean=mean(tstat_montecarlo);


b_noiseup=b_noisemean+(st_critical*se_noisemean);
b_noisedown=b_noisemean-(st_critical*se_noisemean);

clf;
fontsize=14;
width_line=4;
plot(grid, b_pcsig,'k', 'LineWidth',width_line);
ylabel('Proportion Significant at 5% level', 'FontSize',fontsize)
xlabel('Standard Deviation of Error', 'FontSize',fontsize)
print('figure1_2dws','-dpng')


plot(grid, b_pcneg,'k', grid, b_pcnegsig,'r','LineWidth',width_line);
ylim([0 1])
ylabel('Proportion Negative Coefficients', 'FontSize',fontsize)
xlabel('Standard Deviation of Error', 'FontSize',fontsize)
legend('\fontsize{10} Negative Coefficients','\fontsize{10} Negative Significant Coefficients' );
print('figure1_2negative','-dpng')


f1=plot(grid,b_noisemean,'k','LineWidth',width_line);
hold on;
f2=plot(grid,b_noiseup, 'k--','LineWidth',width_line-2);
f3=plot(grid, b_noisedown, 'k--','LineWidth',width_line-2);
f4=plot(grid, zeros(length(grid),1), 'k:' );
ylim([-0.5 1.5])
ylabel('Coefficient on Fraction Majority-Minority', 'FontSize',fontsize)
xlabel('Standard Deviation of Error', 'FontSize',fontsize)
legend([f1 f2 f4],{'Mean Coefficient', 'C.I. from Mean Standard Error', 'Zero Line'}, 'Location','northoutside', 'Orientation','horizontal')
print('figure1_2coefficient','-dpng')
close all;



%% Part 1.3. Monte Carlo with Simulations with DW Standard Errors from Carroll et al;
    %Weighted OLS estimates and standard errors with current scores ;
[libs_current,libslag_current]=create_liberal( reg_congress,reg_state,dw_congress,dw_state,dw_current);

Y_current=libs_current;
X_current=[c,stfracmm,votlib2,blackpop,hispop,demger,libslag_current];

Y_current = Y_current(south_sample,:);
X_current = X_current(south_sample,:);

[b_current,se_current,tstat_current]=ols_weighted(X_current,Y_current,w);

b_current_noise=zeros(k,rep);
se_current_noise=zeros(k,rep);
tstat_current_noise=zeros(k,rep);
    
    %See to replicate results;
rng(1991);
    %Here we draw the noise for the standard errors vector from Carroll et
    %al., and reestimate the regression from Shotts.;
 for i=1:rep;
     dw_current_noise=dw_current+normrnd(0,dw_se);
     [libs_current_noise,libslag_current_noise]=create_liberal( reg_congress,reg_state,dw_congress,dw_state,dw_current_noise);
     
     Y_current_noise=libs_current_noise;
     X_current_noise=[c,stfracmm,votlib2,blackpop,hispop,demger,libslag_current_noise];
     
     Y_current_noise = Y_current_noise(south_sample,:);
     X_current_noise = X_current_noise(south_sample,:);  
     
     
     [b_current_noise(:,i), se_current_noise(:,i),tstat_current_noise(:,i)]=ols_weighted(X_current_noise,Y_current_noise,w);
 end
 
b_current_mean=mean(b_current_noise')';
tstat_sig_mean=mean((abs(tstat_current_noise')>t_critical)*100)';

negtstat_sig_mean=mean(b_current_noise(2,:)<0);

    % Create significance stars for table display;
star_orig={''};
star_current={''};

for i=1:k;
    if abs(tstat_orig(i))> norminv(0.95,0,1);
        star_orig(i)=cellstr('*');
    end
    
    if abs(tstat_orig(i))> norminv(0.975,0,1);
        star_orig(i)=cellstr('**');
    end
    
    if abs(tstat_orig(i))> norminv(0.995,0,1);
        star_orig(i)=cellstr('***');
    end
end

for i=1:k;
    if abs(tstat_current(i))> norminv(0.95,0,1);
        star_current(i)=cellstr('*');
    end
    
    if abs(tstat_current(i))> norminv(0.975,0,1);
        star_current(i)=cellstr('**');
    end
    
    if abs(tstat_current(i))> norminv(0.995,0,1);
        star_current(i)=cellstr('***');
    end
end

star_orig=star_orig';
star_current=star_current';

    %Format table 1.3. for paper;
rownames_table1_3={'Intercept'; 'Fraction Majority-Minority'; 'Unified Democratic Control'; 'Voter Liberalism'; '\% Black'; '\% Latino'; 'Lagged Left of Median'};

OriginalCoeff=strcat(num2str(round(b_orig,4)),star_orig);
OriginalSE=round(se_orig,4);
CurrentCoeff=strcat(num2str(round(b_current,4)),star_current);
CurrentSE=round(se_current,4);

table_temp=table(OriginalCoeff, OriginalSE, CurrentCoeff, CurrentSE, tstat_sig_mean, 'RowNames', rownames_table1_3);
table1_3=[table_temp(2:k,:);table_temp(1,:)];
writetable(table1_3,'table1_3.csv','Delimiter',',', 'WriteRowNames',true)

%% Part 1.4. Regression with Bonica Scores;

    % Create share of liberals, current and lagged, not with CHANGING (for each member,
    % across congresses) Bonica CF scores, setup Shotts original regression variables;
[libs_bonica1,libslag_bonica1]=create_liberal(reg_congress,reg_state,dw_congress,dw_state, cf_congress);

Y_bonica1=libs_bonica1;
X_bonica1=[c,stfracmm,votlib2,blackpop,hispop,demger,libslag_bonica1];

Y_bonica1 = Y_bonica1(south_sample,:);
X_bonica1 = X_bonica1(south_sample,:);

[b_bonica1,se_bonica1,tstat_bonica1]=ols_weighted(X_bonica1,Y_bonica1,w);

    % Create share of liberals, current and lagged, not with CONSTANT Bonica CF scores, setup Shotts original regression variables;
[libs_bonica2,libslag_bonica2]=create_liberal(reg_congress,reg_state,dw_congress,dw_state,cf_constant);

Y_bonica2=libs_bonica2;
X_bonica2=[c,stfracmm,votlib2,blackpop,hispop,demger,libslag_bonica2];

Y_bonica2 = Y_bonica2(south_sample,:);
X_bonica2 = X_bonica2(south_sample,:);

[b_bonica2,se_bonica2,tstat_bonica2]=ols_weighted(X_bonica2,Y_bonica2,w);


    % Create significance stars for table display;
star_bonica1=cellstr(' ');
star_bonica2=cellstr(' ');

for i=1:k;
    if abs(tstat_bonica1(i))> norminv(0.95,0,1);
        star_bonica1(i)=cellstr('*');
    end
    
    if abs(tstat_bonica1(i))> norminv(0.975,0,1);
        star_bonica1(i)=cellstr('**');
    end
    
    if abs(tstat_bonica1(i))> norminv(0.995,0,1);
        star_bonica1(i)=cellstr('***');
    end
end

for i=1:k;
    if abs(tstat_bonica2(i))> norminv(0.95,0,1);
        star_bonica2(i)=cellstr('*');
    end
    
    if abs(tstat_bonica2(i))> norminv(0.975,0,1);
        star_bonica2(i)=cellstr('**');
    end
    
    if abs(tstat_bonica2(i))> norminv(0.995,0,1);
        star_bonica2(i)=cellstr('***');
    end
end

star_bonica1=star_bonica1';
star_bonica2=star_bonica2';

    %Format table 1.4. for paper;
rownames_table1_4={'Intercept'; 'Fraction Majority-Minority'; 'Unified Democratic Control'; 'Voter Liberalism'; '\% Black'; '\% Latino'; 'Lagged Left of Median'};


Bonica1Coeff=strcat(num2str(round(b_bonica1,4)),star_bonica1);
Bonica1SE=round(se_bonica1,4);
Bonica2Coeff=strcat(num2str(round(b_bonica2,4)),star_bonica2);
Bonica2SE=round(se_bonica2,4);


table_temp=table(OriginalCoeff, OriginalSE ,Bonica1Coeff,Bonica1SE,Bonica2Coeff, Bonica2SE, 'RowNames', rownames_table1_4);
table1_4=[table_temp(2:k,:);table_temp(1,:)];
writetable(table1_4,'table1_4.csv','Delimiter',',', 'WriteRowNames',true)









 
 